import numpy as np
import pandas as pd
import json
import sys
import os
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import seaborn as sns
import pdb
from util import utils as data_utils
%pylab inline
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'Blues'
# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2
json_file = './cifar_results/noise/bootstrap_const_lr_001/checkpoint_200.json'
FDIR = os.path.dirname(json_file)
NUM_CLASSIFY = 10
# Plot gradients norms for the entire learning process
grads_json_filename = os.path.join(FDIR, 'model_grads.json')
grads = [[], [], []]
grads_key = ['max_grad_w1_16', 'max_grad_w1_32', 'max_grad_w1_64']
if os.path.exists(grads_json_filename):
with open(grads_json_filename, 'r') as fp:
data = json.load(fp)
for i, k in enumerate(grads_key):
if data[0].get(k, None) is None:
continue
for batch_grads in data:
grads[i].append(batch_grads[k])
def plot_grads(grads, title, x_label, y_label, figsize=(10, 8)):
plt.figure(figsize=figsize)
# plt.subplot(2, 1, 1)
plt.plot(grads)
plt.title(title)
plt.ylabel(y_label)
plt.xlabel(x_label)
for i, g in enumerate(grads):
if len(g) > 0:
plot_grads(g, grads_key[i], 'iterations', grads_key[i])
# pass
with open(json_file, 'r') as fp:
data = json.load(fp)
# Loss history might not be of equal length.
train_loss_hist = data['train_loss_history']
val_loss_hist = data['val_loss_history']
# pdb.set_trace()
def plot_loss_hist(loss_hist, title,):
plt.figure(figsize=(5,4))
plt.subplot(1, 1, 1)
plt.plot(loss_hist)
plt.title(title) # Train Loss
plt.ylabel('loss')
plt.xlabel('time')
plt.show()
plot_loss_hist(train_loss_hist, 'Train Loss')
plot_loss_hist(val_loss_hist, 'Val loss')
if data.get('crit1_loss_history', None) is not None:
plot_loss_hist(data['crit1_loss_history'], 'Target criterion loss')
if data.get('crit2_loss_history', None) is not None and \
len(data['crit2_loss_history']) > 0:
plot_loss_hist(data['crit2_loss_history'], 'Pred criterion loss')
if data.get('pred_loss_history', None) is not None and \
len(data['pred_loss_history']) > 0:
plot_loss_hist(data['pred_loss_history'], 'Total Pred loss (beta*t + (1-beta)*p)')
if data.get('beta_loss_history', None) is not None and \
len(data['beta_loss_history']) > 0:
plot_loss_hist(data['beta_loss_history'], 'Beta loss')
if data.get('KL_loss_history', None) is not None:
# Loss history might not be of equal length.
KL_loss_hist = data['KL_loss_history']
plt.figure(figsize=(10,8))
plt.plot(KL_loss_hist)
plt.title('KL loss')
plt.ylabel('loss')
plt.xlabel('time')
plt.show()
def get_conf(json_file, num_classes=26, json_key='conf'):
with open(json_file, 'r') as fp:
data = json.load(fp)
conf = data.get(json_key, None)
if conf is None:
return
# c1 = conf.split('\n')[1].split("]")[0].split("[ ")[1].split(" ")
c1 = conf.split('\n')
# print(c1)
conf_mat, row_idx = np.zeros((num_classes, num_classes)), 0
for i in c1:
#pdb.set_trace()
is_conf_row = False
if ']' in i and '[[' in i:
val = i.split(']')[0].split('[[')[1].split(' ')
is_conf_row = True
elif ']' in i and '[' in i:
val = i.split(']')[0].split('[')[1].split(' ')
is_conf_row = True
if is_conf_row:
col_idx = 0
for v in val:
if not len(v):
continue
try:
conf_mat[row_idx, col_idx] = int(v)
col_idx = col_idx + 1
except:
continue
row_idx = row_idx + 1
assert(row_idx == num_classes)
conf_mat = conf_mat.astype(int)
fdir = os.path.dirname(json_file)
json_name = os.path.basename(json_file)[:-5]
conf_file_name = fdir + '/' + 'conf_' + json_name + '.txt'
np.savetxt(conf_file_name, conf_mat, fmt='%d', delimiter=', ')
return conf_mat
def plot_conf(norm_conf):
# Plot using seaborn
# (this is style I used for ResNet matrix)
plt.figure(figsize=(10,6))
df_cm = pd.DataFrame(norm_conf)
sns.heatmap(df_cm, annot=True, cmap="Blues")
plt.show()
def get_sorted_checkpoints(fdir):
# Checkpoint files are named as 'checkpoint_%d.json'
checkpoint_map = {}
for f in os.listdir(fdir):
if f.endswith('json') and f.startswith('checkpoint'):
checkpoint_num = int(f.split('checkpoint_')[-1].split('.')[0])
checkpoint_map[checkpoint_num] = f
sorted_checkpoints = []
for k in sorted(checkpoint_map.keys()):
v = checkpoint_map[k]
sorted_checkpoints.append(v)
return sorted_checkpoints
def best_f_scores(fdir, num_classes=5):
best_checkpoints = [None, None, None]
best_3_fscores = [0, 0, 0]
best_confs = [np.array(()), np.array(()), np.array(())]
f1_weight_list = [1.0] * num_classes
f1_weights = np.array(f1_weight_list)
sorted_checkpoint_files = get_sorted_checkpoints(fdir)
for f in sorted_checkpoint_files:
json_file = fdir + '/' + f
conf = get_conf(json_file, num_classes, json_key='val_conf')
norm_conf = data_utils.normalize_conf(conf)
f1 = data_utils.get_f1_score(conf, f1_weights)
kappa = data_utils.computeKappa(conf)
wt_f1 = data_utils.computeWeightedF1(conf)
print('file: {}, f1: {:.3f}, kappa: {:.3f}, weighted-F1: {:.3f}'.format(
f, f1, kappa, wt_f1))
plot_conf(norm_conf)
max_idx = -1
for i in range(len(best_3_fscores)):
if best_3_fscores[i] > f1:
break
max_idx = i
for j in range(max_idx):
best_3_fscores[j] = best_3_fscores[j+1]
best_confs[j] = best_confs[j+1]
best_checkpoints[j] = best_checkpoints[j+1]
best_3_fscores[max_idx] = f1
best_confs[max_idx] = conf
best_checkpoints[max_idx] = f
return best_3_fscores, best_confs, best_checkpoints
def plot_train_conf(fdir, num_classes=5):
sorted_checkpoint_files = get_sorted_checkpoints(fdir)
if len(sorted_checkpoint_files) > 0:
last_checkpoint = sorted_checkpoint_files[-1]
json_file = fdir + '/' + last_checkpoint
conf = get_conf(json_file, num_classes=num_classes, json_key='train_conf')
print(conf)
norm_conf = data_utils.normalize_conf(conf)
f1_weight_list = [1.0] * num_classes
f1_weights = np.array(f1_weight_list)
f1 = data_utils.get_f1_score(conf, f1_weights)
kappa = data_utils.computeKappa(conf)
wt_f1 = data_utils.computeWeightedF1(conf)
print('file: {}, f1: {:.3f}, kappa: {:.3f}, weighted-F1: {:.3f}'.format(
f, f1, kappa, wt_f1))
plot_conf(norm_conf)
plot_train_conf(FDIR, num_classes=NUM_CLASSIFY)
best_f_scores(FDIR, num_classes=NUM_CLASSIFY)